## Do the matching
options(scipen = 8, digits = 4, width = 132)

library(nbpMatching)
library(RItools)
library(optmatch)
library(MASS)
library(parallel)
library(dplyr)
library(designmatch)

source(here::here("Design", "nonbimatchingfunctions.R"))

load(here::here("Data", "wrkdatOwnMap_anyDA_new.rda"), verbose = TRUE)
load(here::here("Design", "dist_mats_anyDA_Diversity_new.rda"), verbose = TRUE)

wdat0 <- wrkdatOwnMap_new %>%
  filter(!is.na(vm_change) & !is.na(social.capital01) & !is.na(community.resp01) & !is.na(da_diversity_index)) %>%
  droplevels()
dim(wdat0)

## Make simple objective distance (maybe move this to dist_mats_data_anyData_new.R because it overwrites that divdaDist object)
divdaDist <- scalar.dist(wdat0$da_diversity_index, scalefactor = 100000) ## the matching software wants integers
dimnames(divdaDist) <- list(row.names(wdat0), row.names(wdat0))

## What are the kinds of differences we would see within pair?
## to create near and far criteria for the matching
# perceptions_mat <- scalar.dist(wdat0$subjcomm.diversity.index, int = FALSE)
## vm_change_mat <- scalar.dist(wdat0$vm_change, int = FALSE)
# quantile(as.vector(perceptions_mat), seq(0, .1, .01))
# quantile(as.vector(vm_change_mat), seq(0, 1, .1))
# quantile(as.vector(divdaDist) / 100000, seq(0, 1, .1))

## Match exactly within csd type rural vs urban
canada_exact <- list(covs = as.matrix(wdat0$csd_urban_06))

## Require matches to be within .02 on da vm and .03 on change in vm 2016 - 2006
canada_nearlist <- list(
  covs = as.matrix(wdat0[, c("da_diversity_index", "vm_change")]),
  pairs = c(
    da_diversity_index = .7,
    vm_change = .025
  )
)

## What is the smallest non zero difference in perceptions --- for the "far" constraint
# min_perc_dist <- min(as.vector(perceptions_mat)[as.vector(perceptions_mat) != 0])
min_perc_dist <- .0001
canada_farlist <- list(
  covs = as.matrix(wdat0[, "subjcomm.diversity.index"]),
  pairs = c(subjcomm.diversity.index = min_perc_dist)
)

solverlist <- list(name = "gurobi", approximate = 1, t_max = 1000, trace = 1)

system.time(
  res <- nmatch(
    dist_mat = divdaDist,
    near = canada_nearlist,
    far = canada_farlist,
    exact = canada_exact,
    # subset_weight = 1,
    solver = solverlist,
    total_pairs = 2550
  )
)

## dropped
## 2518 with subset_weight=1 p=.26 (1934 pairs)
## 1640 with subset_weight=NULL total_pairs=2500 p=.91
## 1538 with subset_weight=NULL total_pairs=2550 p=.82
## 1260 with subset_weight=NULL total_pairs=2720 p=.64
## 1216 with subset_weight=NULL total_pairs=2740 p=.66
## 1194 with subset_weight=NULL total_pairs=2750 p=.32
## 1112 with subset_weight=NULL total_pairs=2800 p=.44
##  934 with subset_weight=NULL total_pairs=2900 p=.45
## 1290 with subset_weight=NULL total_pairs=2700 p=.24
## 1382 with subset_weight=NULL total_pairs=2650 p=.60
## 1386 with subset_weight=NULL total_pairs=2650 p=.63 (highs)
## 1194 with subset_weight=NULL total_pairs=2600 p=.60

## Convert nmatch output into a vector for use in analysis
wdat0$id <- row.names(wdat0)
canada_pairs_df <- nmatch_to_df(res, origid = wdat0$id)

# The nmatch_to_df function creates a column labeled "pair" which contains
## the matched set indicator
wdat <- inner_join(wdat0, canada_pairs_df, by = "id")
wdat <- droplevels(wdat)
stopifnot(nrow(wdat) == nrow(canada_pairs_df))
stopifnot(length(unique(wdat$pair)) == nrow(wdat) / 2)

## Checking to see that we didn't match any two people with identical perceptions
pairpercdiffs <- tapply(wdat$subjcomm.diversity.index, wdat$pair, function(x) {
  abs(diff(x))
})
stopifnot(min(pairpercdiffs) > 0)

## This next looks at the continuous relationship with the variables that we
## really want to hold constant

thecovs <- c("da_diversity_index", "vm_change")
balfmla <- reformulate(thecovs, response = "subjcomm.diversity.index")
## xb1 <- xBalance(balfmla, strata = list(pair = ~pair), report = "all", data = wdat)
## xb1$overall
## xb1$results[, c("z", "p"), "pair"]

## Easier to think about "higher perceiver" versus "lower perceiver" when it comes to asking whether the matching worked.
wdat <- wdat %>%
  group_by(pair) %>%
  mutate(perc_more = rank(subjcomm.diversity.index) - 1) %>%
  ungroup()

## Further simplify the working file
wdat1 <- wdat %>%
  mutate(pairF = factor(pair)) %>%
  select(
    perc_more, da_prop_vm_20pct_06, vm_change, pair, pairF, social.capital01, community.resp01,
    subjcomm.diversity.index, vcid, dauid, csd_pop_06, csd_prop_vm_20pct_06,
    da_pop_dens_06, csd_urban_06, csd_pop_dens_06, community_area_km, da_diversity_index
  )

xb1_ranked0 <- xBalance(perc_more ~ da_diversity_index + vm_change,
  strata = list(unstr = NULL, pair = ~pair), report = "all", data = wdat1
)
xb1_ranked0$results
xb1_ranked0$overall
nrow(wdat0)
length(res$group_id)
nrow(wdat0) - length(res$group_id)


# balfmla_ranked <- update(balfmla, perc_more ~ .)
# xb1_ranked <- balanceTest(update(balfmla_ranked, . ~ . + strata(pair)), data = wdat1, p.adjust = "none")
# xb1_ranked$overall
# xb1_ranked$results[, c("Control", "Treatment", "adj.diff", "std.diff", "p"), ] # , "pair"]
# length(res$group_id)


## An alternative for the overall test which should be fairly close to it
library(formula.tools)
library(coin)
coin_fmla <- ~ subjcomm.diversity.index | pairF
lhs(coin_fmla) <- rhs(balfmla)
coin_test <- independence_test(coin_fmla, data = wdat1, teststat = "quadratic")
coin_test_perm <- independence_test(coin_fmla, data = wdat1, teststat = "quadratic", distribution = approximate(nresample = 1000))
coin::pvalue(coin_test)
coin::pvalue(coin_test_perm)

coin_fmla_rank <- da_diversity_index + vm_change ~ perc_more | pairF
coin_test_rank <- independence_test(coin_fmla_rank, data = wdat1, teststat = "quadratic")
coin_test_rank_perm <- independence_test(coin_fmla_rank,
  data = wdat1,
  teststat = "quadratic", distribution = approximate(nresample = 1000)
)
coin::pvalue(coin_test_rank)
coin::pvalue(coin_test_rank_perm)

pairdiffs <- wdat1 %>%
  group_by(pair) %>%
  summarize(
    da_diversity_index = abs(diff(da_diversity_index)),
    vm_change = abs(diff(vm_change)),
    da_prop_vm_20pct_06 = abs(diff(da_prop_vm_20pct_06)),
    subjcomm.diversity.index = abs(diff(subjcomm.diversity.index)),
    csd_pop_06 = abs(diff(csd_pop_06)),
    csd_pop_dens_06 = abs(diff(csd_pop_dens_06)),
    csd_prop_vm_20pct_06 = abs(diff(csd_prop_vm_20pct_06)),
    community.area.km = abs(diff(community_area_km))
  )

sapply(pairdiffs, summary)

sapply(pairdiffs, function(x) {
  quantile(x, sort(unique(c(seq(0, 1, .1), seq(.9, 1, .01)))), na.rm = TRUE)
})

matches_anyDA_Diversity_new <- wdat1[, c("vcid", "pair")]

save(matches_anyDA_Diversity_new, file = here::here("Design", "matches_anyDA_Diversity_new.rda"))
